global input "Q:\dc1prhcmsas01\PU2\data - sas"
global input2 "Q:\dc1prhcmsas01\PU2\New folder"
global input3 "Q:\dc1prhcmsas01\PU2\temp"
global temp "Q:\dc1prhcmsas01\PU2\temp_stata_wr"
global log "Q:\dc1prhcmsas01\PU2\Log - Stata WR"
global output "Q:\dc1prhcmsas01\PU2\data_stata_wr"

cd "Q:\dc1prhcmsas01\PU2\Code - Stata WR"

capture log close
log using "$log/0_client_crosswalk_wr", replace

*Import
forvalues year=2013/2021 {
    forvalues month=1/12 {
		if `year'<=2020 | (`year'==2021 & `month'<=6) {
			//Use captures so that I import 0`month' if month<10 and `month' if month>=10
			if (`year'==2015 & `month'<=10 & `month'!=4) | (`year'==2013 & `month'<=11) {
				capture import sas CLIENT_CODE YR_MONTH EMP_STAT_C NAICS_CODE1 using "$input\ap_`year'0`month'.sas7bdat", case(lower) clear
				capture import sas CLIENT_CODE YR_MONTH EMP_STAT_C NAICS_CODE1 using "$input\ap_`year'`month'.sas7bdat", case(lower) clear 
			}
			else {
				capture import sas client_code YR_MONTH EMP_STAT_C NAICS_CODE1 using "$input\ap_`year'0`month'.sas7bdat", case(lower) clear
				capture import sas client_code YR_MONTH EMP_STAT_C NAICS_CODE1 using "$input\ap_`year'`month'.sas7bdat", case(lower) clear
			}
			
		*Drop terminated employees
		keep if emp_stat_c!="T"
		
		*Destring naics and count number of occurances of each naics within firm
		destring naics, replace force
		gen n=1
		collapse (count) weight=n, by(yr_month client_code naics)
		
		compress
		save "$temp\temp_`year'`month'", replace
		}
	}
}
	
*Append
clear
forvalues year=2013/2021 {
    forvalues month=1/12{
		if `year'<=2020 | (`year'==2021 & `month'<=6) {
	   append using "$temp\temp_`year'`month'"
		}
	}
}

*Most common naics
	collapse (sum) weight, by(client_code naics)
	bys client_code: gegen max=max(weight) if naics!=. // Define naics as the most common non-missing naics
	bys client_code: gegen count=count(weight)
	keep if weight==max | count==1 //Want to keep firms with no naics ever

*Encode client_code
	*encode client_code, generate(client_id) // Too many values
	duplicates drop client_code, force
	gen client_id = _n
	keep client_code client_id naics
	rename naics naics
	save "$output/client_crosswalk", replace

*Delete temp files
forvalues year=2013/2021 {
    forvalues month=1/12{
		if `year'<=2020 | (`year'==2021 & `month'<=6) {
	   erase "$temp\temp_`year'`month'.dta"
		}
	}
}

log close